This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
df<-read.csv("/Users/jialincheoh/analysis-2017/spring-2017/script/analysis/overall_phase5_bogota3.csv", header =TRUE, sep=",")
df
df0<-read.csv("/Users/jialincheoh/analysis-2017/spring-2017/script/analysis/overall_phase5_bogota0.csv", header =TRUE, sep=",")
df0
library(car)
avPlots(lm(len_unique ~ add.loc.45 + novelty.phase5, df))
model <- lm(len_unique ~ add.loc.15, df)
summary(model)
Call:
lm(formula = len_unique ~ add.loc.15, data = df)
Residuals:
Min 1Q Median 3Q Max
-4872 -4381 -2993 -1457 67216
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4872.22 2851.70 1.709 0.0955 .
add.loc.15 -54.23 69.31 -0.782 0.4387
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 12730 on 39 degrees of freedom
Multiple R-squared: 0.01546, Adjusted R-squared: -0.009789
F-statistic: 0.6122 on 1 and 39 DF, p-value: 0.4387
plot(model)
library(MASS)
r <- rlm(len_unique ~ add.loc.15, data=df, psi=psi.bisquare)
summary(r)
Call: rlm(formula = len_unique ~ add.loc.15, data = df, psi = psi.bisquare)
Residuals:
Min 1Q Median 3Q Max
-78.43 -18.90 14.69 212.65 71834.50
Coefficients:
Value Std. Error t value
(Intercept) 33.2673 12.8966 2.5795
add.loc.15 0.8092 0.3135 2.5814
Residual standard error: 50.49 on 39 degrees of freedom
confint.default(r)
2.5 % 97.5 %
(Intercept) 7.990445 58.54406
add.loc.15 0.194786 1.42353
WLS.mod <- lm(len_unique ~ add.loc.15, data=df)
wts1 <- 1/fitted(lm(abs(residuals(WLS.mod)) ~ add.loc.15, df))^2
WLS.mod2 <- lm(len_unique ~ add.loc.15, data=df, weight=wts1)
summary(WLS.mod2)
Call:
lm(formula = len_unique ~ add.loc.15, data = df, weights = wts1)
Weighted Residuals:
Min 1Q Median 3Q Max
-0.7300 -0.4972 -0.4910 -0.4543 8.5424
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4144.94 2330.79 1.778 0.0831 .
add.loc.15 -45.37 26.62 -1.705 0.0962 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.791 on 39 degrees of freedom
Multiple R-squared: 0.06934, Adjusted R-squared: 0.04547
F-statistic: 2.906 on 1 and 39 DF, p-value: 0.09623
confint(WLS.mod2)
2.5 % 97.5 %
(Intercept) -569.51797 8859.399118
add.loc.15 -99.21131 8.467861
residualPlots(model)
Test stat Pr(>|Test stat|)
add.loc.45 0.1533 0.8790
novelty.phase5 -0.6820 0.4995
Tukey test -0.6524 0.5142
lmLength <- residuals(model)
shapiro.test(lmLength)
Shapiro-Wilk normality test
data: lmLength
W = 0.42786, p-value = 1.885e-11
qqnorm(lmLength)
qqline(lmLength)
library(car)
ncvTest(model)
Non-constant Variance Score Test
Variance formula: ~ fitted.values
Chisquare = 15.92375, Df = 1, p = 6.5946e-05
plot(lm(len_unique ~ add.loc.45 + novelty.phase5, df), pch=18, col="red", which=c(4))
qf(0.2, 3, 38)
[1] 0.3351188
qf(0.5, 3, 38)
[1] 0.803003
boxplot(df[c(6, 66, 144)])
plot(df[c(6, 66, 144)])
cor(df[c(6, 66, 144)])
novelty.phase5 add.loc.15 len_unique
novelty.phase5 1.0000000 0.2923436 -0.1709687
add.loc.15 0.2923436 1.0000000 -0.1243208
len_unique -0.1709687 -0.1243208 1.0000000
cor.test.p <- function(x){
FUN <- function(x, y) cor.test(x, y)[["p.value"]]
z <- outer(
colnames(x),
colnames(x),
Vectorize(function(i,j) FUN(x[,i], x[,j]))
)
dimnames(z) <- list(colnames(x), colnames(x)) }
cor.test.p(df[c(6, 66, 144)])
library(car)
influencePlot(lm(len_unique ~ add.loc.45 + novelty.phase5, df))
df1<-read.csv("/Users/jialincheoh/analysis-2017/spring-2017/script/analysis/overall_phase5_bogota3_copy.csv", header =TRUE, sep=",")
df1
library(car)
avPlots(lm(len_unique ~ add.loc.45 + novelty.phase5, df1))
model1 <- lm(len_unique ~ add.loc.45 + novelty.phase5, df1)
summary(model1)
Call:
lm(formula = len_unique ~ add.loc.45 + novelty.phase5, data = df1)
Residuals:
Min 1Q Median 3Q Max
-2837 -2228 -1048 -577 36934
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2836.786 1931.524 1.469 0.150
add.loc.45 -1.617 7.668 -0.211 0.834
novelty.phase5 -23.710 31.919 -0.743 0.462
Residual standard error: 6519 on 37 degrees of freedom
Multiple R-squared: 0.01692, Adjusted R-squared: -0.03622
F-statistic: 0.3184 on 2 and 37 DF, p-value: 0.7293
plot(model1)
lmLength <- residuals(model1)
shapiro.test(lmLength)
Shapiro-Wilk normality test
data: lmLength
W = 0.35354, p-value = 4.553e-12
qqnorm(lmLength)
qqline(lmLength)
library(car)
ncvTest(model1)
Non-constant Variance Score Test
Variance formula: ~ fitted.values
Chisquare = 21.96367, Df = 1, p = 2.7786e-06